import numpy as np
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimage
from moviepy.editor import VideoFileClip
%matplotlib inline
# sample calibration images
import random
index = random.randint(0, 20)
name = 'camera_cal/calibration' + str(index) + '.jpg'
img = mpimage.imread(name)
plt.suptitle('Random Calibration Image')
plt.imshow(img)
I start by preparing "object points", which will be the (x, y, z) coordinates of the chessboard corners in the world. Here I am assuming the chessboard is fixed on the (x, y) plane at z=0, such that the object points are the same for each calibration image. Thus, objp is just a replicated array of coordinates, and objpts will be appended with a copy of it every time I successfully detect all chessboard corners in a test image. imgpts will be appended with the (x, y) pixel position of each of the corners in the image plane with each successful chessboard detection.
I then used the output objpts and imgpts to compute the camera calibration and distortion coefficients using the cv2.calibrateCamera() function. I applied this distortion correction to the test image using the cv2.undistort() function and obtained the results. Sample images of these are shown in the code cells below.
import glob
# preparing points as (0,0,0).. etc.
objp = np.zeros((6*9,3), np.float32)
objp[:,:2] = np.mgrid[0:9, 0:6].T.reshape(-1,2)
objpts = [] # real word points (3d space)
imgpts = [] # 2d points - image space
images = glob.glob('camera_cal/calibration*.jpg')
for i, name in enumerate(images):
img = cv2.imread(name)
# convert from bgr to gray image.
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# find chessboard corners
ret, corners = cv2.findChessboardCorners(gray, (9,6), None)
# append object points & image points if corners are found
if ret == True:
objpts.append(objp)
imgpts.append(corners)
# Draw & display corners
cv2.drawChessboardCorners(img, (9,6), corners, ret)
plt.suptitle('Calibrated Image')
plt.imshow(img)
# cv2.imshow('img', img)
# cv2.waitKey(500)
import pickle
# Test undistortion on an image
img = cv2.imread('camera_cal/calibration1.jpg')
img_size = (img.shape[1], img.shape[0])
# Do camera calibration given object points and image points
ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpts, imgpts, img_size,None,None)
dst = cv2.undistort(img, mtx, dist, None, mtx)
# Save the camera calibration result for later use (we won't worry about rvecs / tvecs)
dist_pickle = {}
dist_pickle["mtx"] = mtx
dist_pickle["dist"] = dist
pickle.dump( dist_pickle, open( "calib_pickle.p", "wb" ) )
# Visualize undistortion
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20,10))
ax1.imshow(img)
ax1.set_title('Original Image', fontsize=30)
ax2.imshow(dst)
ax2.set_title('Undistorted Image', fontsize=30)
The following cell includes various steps/methods used in the eventual pipeline.
# Sample image for testing various steps of the pipeline
# sample_img = cv2.imread('test_images/straight_lines1.jpg')
sample_img = cv2.imread('test_images/test4.jpg')
# sample_img = cv2.imread('test_images/test2.jpg')
# sample_img = cv2.imread('vid_frame_10/frame0.jpg')
# sample_img = cv2.imread('fram_22/frame2.jpg')
sample_img = cv2.cvtColor(sample_img, cv2.COLOR_BGR2RGB)
print('test4.jpg')
plt.imshow(sample_img)
# undistort the images passed to this function using the computed matrix (mtx) and
# return the image
def undistort(img):
undist = cv2.undistort(img, mtx, dist, None, mtx)
return undist
# testing the undistort function on a sample image.
undistorted = undistort(sample_img)
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20,10))
ax1.imshow(sample_img)
ax1.set_title('Original Image', fontsize=30)
ax2.imshow(undistorted)
ax2.set_title('Undistorted Image', fontsize=30)
# absolute sobel mask (gradient) applied on an image with threshold in x or y direction
def abs_sobel_thresh(img, orient='x', ksize = 3, thresh_min=0, thresh_max=255):
sobel = None
if orient == 'x':
sobel = cv2.Sobel(img, cv2.CV_64F, 1, 0)
else:
sobel = cv2.Sobel(img, cv2.CV_64F, 0, 1)
abs_sobel = np.absolute(sobel)
scaled_sobel = np.uint8(255*abs_sobel/np.max(abs_sobel))
mask = np.zeros_like(scaled_sobel)
mask[(scaled_sobel >= thresh_min) & (scaled_sobel <= thresh_max)] = 1
return mask
# thresholding the overall magnitude of the gradient.
def mag_threshold(img, sobel_kernel=3, mag_thresh=(0, 255)):
sobelx = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=sobel_kernel)
sobely = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=sobel_kernel)
grad_mag = np.sqrt(sobelx**2 + sobely**2)
factor = np.max(grad_mag)/255
grad_mag = np.uint8(grad_mag/factor)
mask = np.zeros_like(grad_mag)
mask[(grad_mag >= mag_thresh[0]) & (grad_mag <= mag_thresh[1])] = 1
return mask
# thresholding the direction of the gradient.
def dir_threshold(img, sobel_kernel=3, thresh=(0, np.pi/2)):
sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=sobel_kernel)
sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=sobel_kernel)
abs_sobelx = np.absolute(sobelx)
abs_sobely = np.absolute(sobely)
dir = np.arctan2(abs_sobely, abs_sobelx)
mask = np.zeros_like(dir)
mask[(dir >= thresh[0]) & (dir <= thresh[1])] = 1
return mask
# apply color mask thresholding to image
def color_threshold(img, thresh=(0, 255)):
binary = np.zeros_like(img)
binary[(img >= thresh[0]) & (img <= thresh[1])] = 1
return binary
# Random testing of images with various color thresholds (H, L, S, Gray & R)
hls = cv2.cvtColor(undistorted, cv2.COLOR_RGB2HLS)
gray = cv2.cvtColor(undistorted, cv2.COLOR_RGB2GRAY)
h_channel = hls[:,:,0]
l_channel = hls[:,:,1]
s_channel = hls[:,:,2]
h_thresh = color_threshold(h_channel, (15, 70))
l_thresh = color_threshold(l_channel, (100, 140))
s_thresh = color_threshold(s_channel, (140, 255))
gray_thresh = color_threshold(gray, (200,255))
r = undistorted[:,:,0]
r_thresh = color_threshold(r, (200, 255))
f, axs = plt.subplots(5, 2, figsize=(50,40))
f.subplots_adjust(hspace = .5, wspace=.3)
axs = axs.ravel()
axs[0].imshow(l_channel, cmap='gray')
axs[0].set_title('l channel', fontsize=20)
axs[1].imshow(l_thresh, cmap='gray')
axs[1].set_title('l threshold', fontsize=20)
axs[2].imshow(s_channel, cmap='gray')
axs[2].set_title('s channel', fontsize=20)
axs[3].imshow(s_thresh, cmap='gray')
axs[3].set_title('s threshold', fontsize=20)
axs[4].imshow(h_channel, cmap='gray')
axs[4].set_title('h channel', fontsize=20)
axs[5].imshow(h_thresh, cmap='gray')
axs[5].set_title('h threshold', fontsize=20)
axs[6].imshow(gray, cmap='gray')
axs[6].set_title('gray', fontsize=20)
axs[7].imshow(gray_thresh, cmap='gray')
axs[7].set_title('gray threshold', fontsize=20)
axs[8].imshow(r, cmap='gray')
axs[8].set_title('R channel', fontsize=20)
axs[9].imshow(r_thresh, cmap='gray')
axs[9].set_title('R threshold', fontsize=20)
# test sobel, magnitude & direction gradients on images
abs_sobel_x = abs_sobel_thresh(gray, ksize=15, thresh_min=50 , thresh_max=255)
abs_sobel_y = abs_sobel_thresh(gray, 'y', ksize=15, thresh_min=50 , thresh_max=255)
mag_thresh = mag_threshold(gray, sobel_kernel=15, mag_thresh=(50,255))
dir_thresh = dir_threshold(gray, sobel_kernel=15, thresh=(0.7,1.2))
f, axs = plt.subplots(4, 2, figsize=(50,40))
axs = axs.ravel()
f.subplots_adjust(hspace = .5, wspace=.3)
axs[0].imshow(gray, cmap='gray')
axs[0].set_title('gray image', fontsize=20)
axs[1].imshow(abs_sobel_x, cmap='gray')
axs[1].set_title('abs sobel x', fontsize=20)
axs[2].imshow(gray, cmap='gray')
axs[2].set_title('gray image', fontsize=20)
axs[3].imshow(abs_sobel_y, cmap='gray')
axs[3].set_title('abs sobel y', fontsize=20)
axs[4].imshow(gray, cmap='gray')
axs[4].set_title('gray image', fontsize=20)
axs[5].imshow(mag_thresh, cmap='gray')
axs[5].set_title('magnitude of gradient', fontsize=20)
axs[6].imshow(gray, cmap='gray')
axs[6].set_title('gray image', fontsize=20)
axs[7].imshow(dir_thresh, cmap='gray')
axs[7].set_title('direction of gradient', fontsize=20)
The code for my perspective transform includes a function called 'warp_image',
The warp_image() function takes as inputs an image (img), as well as source (src) and destination (dst) points. I have hardcoded the source and destination points based on trial and error for now.
The function and its sample use (with example images) is in the 2 code cells below.
I verified that my perspective transform was working as expected by drawing the src and dst points onto a test image and its warped counterpart to verify that the lines appear parallel in the warped image.
# perspective transform - warp image (birds view)
def warp_image(img, src, dst):
h,w = img.shape[:2]
# src image matrix
src = np.float32([(575,464),
(707,464),
(258,682),
(1049,682)])
# destination image matrix
dst = np.float32([(450,0),
(w-450,0),
(450,h),
(w-450,h)])
img_size = (img.shape[1], img.shape[0])
# compute transformation matrix.
M = cv2.getPerspectiveTransform(src, dst)
# inverse transformation matrix
Minv = cv2.getPerspectiveTransform(dst, src)
# get warped image using matrix M
warped = cv2.warpPerspective(img, M, img_size, flags=cv2.INTER_NEAREST)
return warped, M, Minv
# testing warp perspective on an undistorted sample image.
h,w = sample_img.shape[:2]
# define source and destination points for transform
src = np.float32([(575,464),
(707,464),
(258,682),
(1049,682)])
dst = np.float32([(450,0),
(w-450,0),
(450,h),
(w-450,h)])
sampleImg_warp, M_, Minv_ = warp_image(undistorted, src, dst)
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20,10))
ax1.imshow(undistorted)
ax1.set_title('Undistorted Image', fontsize=30)
x = [src[0][0],src[2][0],src[3][0],src[1][0],src[0][0]]
y = [src[0][1],src[2][1],src[3][1],src[1][1],src[0][1]]
x1 = [dst[0][0],dst[2][0],dst[3][0],dst[1][0],dst[0][0]]
y1 = [dst[0][1],dst[2][1],dst[3][1],dst[1][1],dst[0][1]]
ax1.plot(x, y, color='#0000FF', alpha=0.4, linewidth=3, solid_capstyle='round', zorder=2)
ax2.plot(x1, y1, color='#FF00FF', alpha=0.4, linewidth=3, solid_capstyle='round', zorder=2)
ax1.set_ylim([h,0])
ax1.set_xlim([0,w])
ax2.imshow(sampleImg_warp)
ax2.set_title('Unwarped Image', fontsize=30)
The pipeline takes in an image. The next step is to undistort the image. Then I apply a combination of the following color thresholds: Saturation, Red. I also apply a combination of absolute sobel gradient in X and Y direction, magnitude & direction gradients to the image to get the final detected image. Saturation threshold helps detect yellow lines very clearly. The R channel picks up the white lines pretty well. The color channels with the direction, magnitude & abs gradient in X & Y direction helps filter out the shadows (and other noise), putting thefocus on the lane pixels. The pipeline steps are defined in the code cell below in the method 'pipeline()'. The cell after that shows the sample test images after the pipeline has been applied.
# The processing pipeline that every video frame will go through
def pipeline(img):
# undistort the image
undistorted_img = undistort(img)
# Perspective Transform
img_warped, M, Minv = warp_image(undistorted_img, src, dst)
# get hls version of the image.
hls = cv2.cvtColor(img_warped, cv2.COLOR_RGB2HLS)
# get gray scaled version of the image.
gray = cv2.cvtColor(img_warped, cv2.COLOR_RGB2GRAY)
# r-channel
r = img_warped[:,:,0]
# l-channel thresholed
l_thresh = color_threshold(hls[:,:,1], (30,100))
# h-channel thresholded
h_thresh = color_threshold(hls[:,:,0], (15,70))
# s- channel thresholded
s_thresh = color_threshold(hls[:,:,2], (160,255))
# gray-thresholded
gray_thresh = color_threshold(gray, (200,255))
# r-channel thresholded
r_thresh = color_threshold(r, (200, 255))
# lab = cv2.cvtColor(img_warped, cv2.COLOR_RGB2Lab)
# b_lbthresh = color_threshold(lab[:,:,2], (190,255))
# gradient direction thresholded
dir_thresh = dir_threshold(img_warped, sobel_kernel=15, thresh=(0.7,1.2))
# abs sobel in x direction
abs_sobel_x = abs_sobel_thresh(gray, ksize=15, thresh_min=50 , thresh_max=255)
# abs sobel in y direction
abs_sobel_y = abs_sobel_thresh(gray, 'y', ksize=15, thresh_min=50 , thresh_max=255)
# magnitude of the gradient thresholded
mag_thresh = mag_threshold(gray, sobel_kernel=15, mag_thresh=(50,255))
# combined mask on the gray_thresholed image
combined_binary = np.zeros_like(gray_thresh)
# combined filters on the combined image
combined_binary[((s_thresh == 1) & (r_thresh == 1))|((abs_sobel_x == 1) & (abs_sobel_y == 1))|((dir_thresh == 1) & (mag_thresh == 1))] = 1
# returns the combined binary image & the perspective tranform matrices.
return combined_binary, M, Minv
images = glob.glob('./test_images/*.jpg')
# images = glob.glob('./vid_frames/*.jpg')
# images = glob.glob('./vid_frame_10/*.jpg')
# images = glob.glob('./frame_41/*.jpg')
# images = glob.glob('./fram_22/*.jpg')
size = len(images)
f, axs = plt.subplots(size, 2, figsize=(30,70))
f.subplots_adjust(hspace = .5, wspace=.3)
axs = axs.ravel()
i = 0
for image in images:
img = cv2.imread(image)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_processed, M_, Minv_ = pipeline(img)
axs[i].imshow(img, cmap='gray')
axs[i].set_title('original-'+image, fontsize=20)
i += 1
axs[i].imshow(img_processed, cmap='gray')
axs[i].set_title('transformed', fontsize=20)
i += 1
# plot a histogram of the image to see the peaks. This is used
def show_hist(img_processed):
histogram = np.sum(img_processed[img_processed.shape[0]//2:,:], axis=0)
plt.figure()
plt.plot(histogram)
We take the histogram of a given frame of an image. And find the 2 largest peaks in an image in a given search area around the center of the image(from the first quarter of the image to last quarter of the image). We then use this to find the max value of the histogram peaks that represent the left and right line base start point. Now we use a sliding window approach to build windows around lane centers to detect lane points all the way to the top of the frame. We do this for 10 windows. The function is in the code cell below. In the next frame of video we don't need to do a blind search again, but instead we search in a margin around the previous line position as described by the method 'polyfit_from_prev_frame()' in a code cell below. Visualize methods for each of the approaches are also present to test them along with the results on sample images.
# sliding window approach to identify lane line pixels from a given frame.
def sliding_window_fit(binary_warped):
# Assuming you have created a warped binary image called "binary_warped"
# Take a histogram of the bottom half of the image
index = int(binary_warped.shape[0]/2)
histogram = np.sum(binary_warped[index:,:], axis=0)
# Create an output image to draw on and visualize the result
out_img = np.dstack((binary_warped, binary_warped, binary_warped))*255
# Find the peak of the left and right halves of the histogram
# These will be the starting point for the left and right lines
midpoint = np.int(histogram.shape[0]/2)
start_quarter_mark = np.int(midpoint/2)
end_quarter_mark = np.int(midpoint + start_quarter_mark)
leftx_base = np.argmax(histogram[start_quarter_mark:midpoint]) + start_quarter_mark
rightx_base = np.argmax(histogram[midpoint:end_quarter_mark]) + midpoint
print(leftx_base, rightx_base, midpoint)
# Choose the number of sliding windows
nwindows = 10
# Set height of windows
window_height = np.int(binary_warped.shape[0]/nwindows)
# Identify the x and y positions of all nonzero pixels in the image
nonzero = binary_warped.nonzero()
nonzeroy = np.array(nonzero[0])
nonzerox = np.array(nonzero[1])
# Current positions to be updated for each window
leftx_current = leftx_base
rightx_current = rightx_base
# Set the width of the windows +/- margin
margin = 80
# Set minimum number of pixels found to recenter window
minpix = 50
# Create empty lists to receive left and right lane pixel indices
left_lane_inds = []
right_lane_inds = []
# Step through the windows one by one
for window in range(nwindows):
# Identify window boundaries in x and y (and right and left)
win_y_low = binary_warped.shape[0] - (window+1)*window_height
win_y_high = binary_warped.shape[0] - window*window_height
win_xleft_low = leftx_current - margin
win_xleft_high = leftx_current + margin
win_xright_low = rightx_current - margin
win_xright_high = rightx_current + margin
# Draw the windows on the visualization image
cv2.rectangle(out_img,(win_xleft_low,win_y_low),(win_xleft_high,win_y_high),(0,255,0), 2)
cv2.rectangle(out_img,(win_xright_low,win_y_low),(win_xright_high,win_y_high),(0,255,0), 2)
# Identify the nonzero pixels in x and y within the window
good_left_inds = ((nonzeroy >= win_y_low) & (nonzeroy < win_y_high) & (nonzerox >= win_xleft_low) & (nonzerox < win_xleft_high)).nonzero()[0]
good_right_inds = ((nonzeroy >= win_y_low) & (nonzeroy < win_y_high) & (nonzerox >= win_xright_low) & (nonzerox < win_xright_high)).nonzero()[0]
# Append these indices to the lists
left_lane_inds.append(good_left_inds)
right_lane_inds.append(good_right_inds)
# If you found > minpix pixels, recenter next window on their mean position
if len(good_left_inds) > minpix:
leftx_current = np.int(np.mean(nonzerox[good_left_inds]))
if len(good_right_inds) > minpix:
rightx_current = np.int(np.mean(nonzerox[good_right_inds]))
# Concatenate the arrays of indices
left_lane_inds = np.concatenate(left_lane_inds)
right_lane_inds = np.concatenate(right_lane_inds)
# Extract left and right line pixel positions
leftx = nonzerox[left_lane_inds]
lefty = nonzeroy[left_lane_inds]
rightx = nonzerox[right_lane_inds]
righty = nonzeroy[right_lane_inds]
# Fit a second order polynomial to each
left_fit = np.polyfit(lefty, leftx, 2)
right_fit = np.polyfit(righty, rightx, 2)
return left_fit, right_fit, left_lane_inds, right_lane_inds, out_img
MARGIN = 80
# determine lane pixels from a previously detected lane line fit. (Used by subsequent frames to speed up the detection)
def polyfit_from_prev_frame(binary_warped, left_fit, right_fit):
# Assume you now have a new warped binary image
# from the next frame of video (also called "binary_warped")
# It's now much easier to find line pixels!
# get non zero components of image.
nonzero = binary_warped.nonzero()
# extract x & y non zero components
nonzeroy = np.array(nonzero[0])
nonzerox = np.array(nonzero[1])
left_lane_inds = ((nonzerox > (left_fit[0]*(nonzeroy**2) + left_fit[1]*nonzeroy + left_fit[2] - MARGIN)) & (nonzerox < (left_fit[0]*(nonzeroy**2) + left_fit[1]*nonzeroy + left_fit[2] + MARGIN)))
right_lane_inds = ((nonzerox > (right_fit[0]*(nonzeroy**2) + right_fit[1]*nonzeroy + right_fit[2] - MARGIN)) & (nonzerox < (right_fit[0]*(nonzeroy**2) + right_fit[1]*nonzeroy + right_fit[2] + MARGIN)))
# Again, extract left and right line pixel positions
leftx = nonzerox[left_lane_inds]
lefty = nonzeroy[left_lane_inds]
rightx = nonzerox[right_lane_inds]
righty = nonzeroy[right_lane_inds]
# Fit a second order polynomial to each
left_fit = np.polyfit(lefty, leftx, 2)
right_fit = np.polyfit(righty, rightx, 2)
return left_fit, right_fit, left_lane_inds, right_lane_inds
# visualize the polynomial and coordinates identified by the sliding window fit method above.
def visualize_sliding_window(warped_img, left_fit, right_fit, left_lane_inds, right_lane_inds, out_img):
# Generate x and y values for plotting
ploty = np.linspace(0, warped_img.shape[0]-1, warped_img.shape[0] )
left_fitx = left_fit[0]*ploty**2 + left_fit[1]*ploty + left_fit[2]
right_fitx = right_fit[0]*ploty**2 + right_fit[1]*ploty + right_fit[2]
nonzero = warped_img.nonzero()
nonzeroy = np.array(nonzero[0])
nonzerox = np.array(nonzero[1])
out_img[nonzeroy[left_lane_inds], nonzerox[left_lane_inds]] = [255, 0, 0]
out_img[nonzeroy[right_lane_inds], nonzerox[right_lane_inds]] = [0, 0, 255]
plt.figure()
plt.imshow(out_img)
plt.plot(left_fitx, ploty, color='yellow')
plt.plot(right_fitx, ploty, color='yellow')
plt.xlim(0, 1280)
plt.ylim(720, 0)
# sample_img = cv2.imread('test_images/straight_lines1.jpg')
sample_img = cv2.imread('test_images/test5.jpg')
# sample_img = cv2.imread('vid_frames/frame8.jpg')
# sample_img = cv2.imread('vid_frame_10/frame13.jpg')
# sample_img = cv2.imread('frame_41/frame7.jpg')
# sample_img = cv2.imread('fram_22/frame0.jpg')
sample_img = cv2.cvtColor(sample_img, cv2.COLOR_BGR2RGB)
img_processed, M_orig, Minv_orig = pipeline(sample_img)
plt.imshow(sample_img)
show_hist(img_processed)
left_fit_orig, right_fit_orig, left_lane_inds, right_lane_inds, out_img = sliding_window_fit(img_processed)
visualize_sliding_window(img_processed, left_fit_orig, right_fit_orig, left_lane_inds, right_lane_inds, out_img)
# visualize the lane line for a given frame from the detection method that uses the previous lane line info.
def visualize_sliding_window_prev_frame(binary_warped, left_fit, right_fit, left_lane_inds, right_lane_inds):
# Generate x and y values for plotting
ploty = np.linspace(0, binary_warped.shape[0]-1, binary_warped.shape[0] )
left_fitx = left_fit[0]*ploty**2 + left_fit[1]*ploty + left_fit[2]
right_fitx = right_fit[0]*ploty**2 + right_fit[1]*ploty + right_fit[2]
# get non zero components of image.
nonzero = binary_warped.nonzero()
# extract x & y non zero components
nonzeroy = np.array(nonzero[0])
nonzerox = np.array(nonzero[1])
# Create an image to draw on and an image to show the selection window
out_img = np.dstack((binary_warped, binary_warped, binary_warped))*255
window_img = np.zeros_like(out_img)
# Color in left and right line pixels
out_img[nonzeroy[left_lane_inds], nonzerox[left_lane_inds]] = [255, 0, 0]
out_img[nonzeroy[right_lane_inds], nonzerox[right_lane_inds]] = [0, 0, 255]
# Generate a polygon to illustrate the search window area
# And recast the x and y points into usable format for cv2.fillPoly()
left_line_window1 = np.array([np.transpose(np.vstack([left_fitx-MARGIN, ploty]))])
left_line_window2 = np.array([np.flipud(np.transpose(np.vstack([left_fitx+MARGIN, ploty])))])
left_line_pts = np.hstack((left_line_window1, left_line_window2))
right_line_window1 = np.array([np.transpose(np.vstack([right_fitx-MARGIN, ploty]))])
right_line_window2 = np.array([np.flipud(np.transpose(np.vstack([right_fitx+MARGIN, ploty])))])
right_line_pts = np.hstack((right_line_window1, right_line_window2))
# Draw the lane onto the warped blank image
cv2.fillPoly(window_img, np.int_([left_line_pts]), (0,255, 0))
cv2.fillPoly(window_img, np.int_([right_line_pts]), (0,255, 0))
result = cv2.addWeighted(out_img, 1, window_img, 0.3, 0)
plt.figure()
plt.imshow(result)
plt.plot(left_fitx, ploty, color='yellow')
plt.plot(right_fitx, ploty, color='yellow')
plt.xlim(0, 1280)
plt.ylim(720, 0)
# testing sliding_window_lane_detetection using previous frame info.
# sample_img_next_frame = cv2.imread('test_images/straight_lines2.jpg')
# sample_img_next_frame = cv2.imread('vid_frame_10/frame.jpg')
# sample_img_next_frame = cv2.imread('frame_41/frame1.jpg')
sample_img_next_frame = cv2.imread('test_images/test6.jpg')
sample_img_next_frame = cv2.cvtColor(sample_img_next_frame, cv2.COLOR_BGR2RGB)
img_processed_next_frame, M_prev, Minv_prev = pipeline(sample_img_next_frame)
plt.imshow(sample_img_next_frame)
left_fit_next_frame, right_fit_next_frame, left_lane_inds, right_lane_inds = polyfit_from_prev_frame(img_processed_next_frame, left_fit_orig, right_fit_orig)
visualize_sliding_window_prev_frame(img_processed_next_frame, left_fit_next_frame, right_fit_next_frame, left_lane_inds, right_lane_inds)
I computed the radius of curvature based on the following article. 'y_eval' used in the formula is the position within the image upon which the curvature calculation is based (the bottom-most-y-the position of the car in the image) The left and right curver radius computed is in pixel space, so we calculate new coefficients for the real world space by multiplying the coefficients by the conversion factor(ym_per_pix, xm_per_pix).
The position of the vehicle with respect to the center is computed by first taking the mean of the x & y intercepts at the base of the image and subtracting that from the car position (where camera is mounted - center of the base of the image). This is the distance of the car from the center. The function is in the code cell below.
# computing the radius of curvature
def line_curvature(binary_warped, left_fit, right_fit):
ploty = np.linspace(0, binary_warped.shape[0]-1, binary_warped.shape[0])
y_eval = np.max(ploty)
left_curverad = ((1 + (2*left_fit[0]*y_eval + left_fit[1])**2)**1.5) / np.absolute(2*left_fit[0])
right_curverad = ((1 + (2*right_fit[0]*y_eval + right_fit[1])**2)**1.5) / np.absolute(2*right_fit[0])
print('in pixels', left_curverad, right_curverad)
nonzero = binary_warped.nonzero()
nonzeroy = np.array(nonzero[0])
nonzerox = np.array(nonzero[1])
left_lane_inds = ((nonzerox > (left_fit[0]*(nonzeroy**2) + left_fit[1]*nonzeroy + left_fit[2] - MARGIN)) & (nonzerox < (left_fit[0]*(nonzeroy**2) + left_fit[1]*nonzeroy + left_fit[2] + MARGIN)))
right_lane_inds = ((nonzerox > (right_fit[0]*(nonzeroy**2) + right_fit[1]*nonzeroy + right_fit[2] - MARGIN)) & (nonzerox < (right_fit[0]*(nonzeroy**2) + right_fit[1]*nonzeroy + right_fit[2] + MARGIN)))
# Again, extract left and right line pixel positions
leftx = nonzerox[left_lane_inds]
lefty = nonzeroy[left_lane_inds]
rightx = nonzerox[right_lane_inds]
righty = nonzeroy[right_lane_inds]
# Define conversions in x and y from pixels space to meters
ym_per_pix = 30/720 # meters per pixel in y dimension
xm_per_pix = 3.7/700 # meters per pixel in x dimension
# print((ploty*ym_per_pix).shape, len(ploty*ym_per_pix))
# Fit new polynomials to x,y in world space
left_fit_cr = np.polyfit(lefty*ym_per_pix, leftx*xm_per_pix, 2)
right_fit_cr = np.polyfit(righty*ym_per_pix, rightx*xm_per_pix, 2)
# Calculate the new radii of curvature
left_curverad = ((1 + (2*left_fit_cr[0]*y_eval*ym_per_pix + left_fit_cr[1])**2)**1.5) / np.absolute(2*left_fit_cr[0])
right_curverad = ((1 + (2*right_fit_cr[0]*y_eval*ym_per_pix + right_fit_cr[1])**2)**1.5) / np.absolute(2*right_fit_cr[0])
# position of the car.
car_position = binary_warped.shape[1]/2
img_h = binary_warped.shape[0]
l_xintercept = left_fit[0]*h**2 + left_fit[1]*h + left_fit[2]
r_xintercept = right_fit[0]*h**2 + right_fit[1]*h + right_fit[2]
center = (car_position - ((l_xintercept + r_xintercept)/2)) * xm_per_pix
# Now our radius of curvature is in meters
print(left_curverad, 'm', right_curverad, 'm', 'center-offset:', center)
return left_curverad, right_curverad, center
# sample invocation of the method.
l_rad, r_rad, center = line_curvature(img_processed, left_fit_orig, right_fit_orig)
# Anotate the frame with the detected lines & enclosing area of the road as a polygon.
def draw_lines(image, warped, left_fit, right_fit, Minv):
# Create an image to draw the lines on
warp_zero = np.zeros_like(warped).astype(np.uint8)
color_warp = np.dstack((warp_zero, warp_zero, warp_zero))
# Generate x and y values for plotting
ploty = np.linspace(0, warped.shape[0]-1, warped.shape[0])
left_fitx = left_fit[0]*ploty**2 + left_fit[1]*ploty + left_fit[2]
right_fitx = right_fit[0]*ploty**2 + right_fit[1]*ploty + right_fit[2]
# Recast the x and y points into usable format for cv2.fillPoly()
pts_left = np.array([np.transpose(np.vstack([left_fitx, ploty]))])
pts_right = np.array([np.flipud(np.transpose(np.vstack([right_fitx, ploty])))])
pts = np.hstack((pts_left, pts_right))
# Draw the lane onto the warped blank image
cv2.fillPoly(color_warp, np.int_([pts]), (0,255, 0))
# Warp the blank back to original image space using inverse perspective matrix (Minv)
newwarp = cv2.warpPerspective(color_warp, Minv, (image.shape[1], image.shape[0]))
# Combine the result with the original image
result = cv2.addWeighted(image, 1, newwarp, 0.3, 0)
plt.imshow(result)
return result
# draw radius of curvature and deviation from center onto the image frame.
def draw_text(original_img, curv_rad, center_dist):
new_img = np.copy(original_img)
h = new_img.shape[0]
font = cv2.FONT_HERSHEY_SIMPLEX
text = 'Curve radius: ' + '{:04.2f}'.format(curv_rad) + 'm'
cv2.putText(new_img, text, (40,70), font, 1.5, (0,255, 0), 2, cv2.LINE_AA)
direction = ''
if center_dist > 0:
direction = 'right'
elif center_dist < 0:
direction = 'left'
abs_center_dist = abs(center_dist)
text = '{:04.2f}'.format(abs_center_dist) + 'm ' + direction + ' of the center'
cv2.putText(new_img, text, (40,120), font, 1.5, (200,255,155), 2, cv2.LINE_AA)
return new_img
# sample image annotated.
draw_lines(sample_img, img_processed, left_fit_orig, right_fit_orig, Minv_orig)
The Line class is used to save the coefficient information for each detection like last fit, and the last 10 coefficient values. This can be used to average the current fit of the last 10 frames using the weights. This is useful for discarding incorrect of detections that may seem off the usual average values.
We have 'l_line' & 'r_line' for the left & right lines to keep track of this information for the left & right lane lines. We use 'diffs' array in the lines to identify difference from last best fit to ensure weird values are discarded. We perform sanity check before adding any coefficient to the line class.
# Define a class to receive the characteristics of each line detection
class Line():
def __init__(self):
# was the line detected in the last iteration?
self.detected = False
#polynomial coefficients averaged over the last n iterations
self.best_fit = None
#polynomial coefficients for the most recent fit
self.current_fit = []
#difference in fit coefficients between last and new fits
self.diffs = np.array([0,0,0], dtype='float')
# weights to normalize the average
self.weights = np.arange(1,10+1)/10
def set(self, fit):
if fit is not None:
# print("best fit", self.best_fit)
if self.best_fit is not None:
self.diffs = abs(fit-self.best_fit)
# print('diffs', self.diffs)
if (self.diffs[0] > 0.001 or \
self.diffs[1] > 1.0 or \
self.diffs[2] > 100.):
self.detected = True
else:
self.detected = True
self.best_fit = fit
self.current_fit.append(fit)
if len(self.current_fit) > 10:
# throw out old fits, keep newest n
self.current_fit = self.current_fit[len(self.current_fit)-10:]
else:
self.detected = False
def average_fit(self):
return np.average(self.current_fit, 0, self.weights[-len(self.current_fit):])
def process_image(img):
img_copy = np.copy(img)
img_processed, M, Minv = pipeline(img)
# plt.figure()
# plt.imshow(img_processed)
# plt.figure()
# show_hist(img_processed)
# histogram = np.sum(img_processed[img_processed.shape[0]//2:,:], axis=0)
# plt.plot(histogram)
if l_line.detected or not r_line.detected:
left_fit, right_fit, left_lane_inds, right_lane_inds, out_img = sliding_window_fit(img_processed)
else:
avg_l = l_line.average_fit()
avg_r = r_line.avergae_fit()
left_fit, right_fit, left_lane_inds, right_lane_inds = polyfit_from_prev_frame(img_processed, avg_l, avg_r)
# finding x-intercept for sanity check
y_val = img.shape[0]
avg_width = 350
w_margin = 100 # error tolerance in the width
if left_fit is not None and right_fit is not None:
left_fit_x = left_fit[0]*y_val**2 + left_fit[1]*y_val + left_fit[2]
right_fit_x = right_fit[0]*y_val**2 + right_fit[1]*y_val + right_fit[2]
x_diff = np.abs(left_fit_x - right_fit_x)
diff = np.abs(avg_width - x_diff)
# print('diff, ', diff)
if diff > w_margin:
left_fit = None
right_fit = None
l_line.set(left_fit)
r_line.set(right_fit)
final_output_img = None
avg_l_best = l_line.average_fit()
avg_r_best = r_line.average_fit()
if avg_l_best is not None and avg_r_best is not None:
# print(l_line.current_fit)
# print(weights)
l_rad, r_rad, dcenter = line_curvature(img_processed, avg_l_best, avg_r_best)
# print(l_line.best_fit, r_line.best_fit)
output_img = draw_lines(img, img_processed, avg_l_best, avg_r_best, Minv)
final_output_img = draw_text(output_img, ((l_rad + r_rad)/2), dcenter)
# plt.figure()
# plt.imshow(output_img)
else:
final_output_img = img_copy
return final_output_img
The code below runs the pipeline on each frame of the video and finally generates a video with the lane annotated. Here's a link to the final video.
# process video frames
l_line = Line()
r_line = Line()
# image_frames = glob.glob('./vid_frame_10/*.jpg')
# for image in image_frames:
# img = cv2.imread(image)
# img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# ret_img = process_image(img)
output = 'output_video.mp4'
# v_input = VideoFileClip('project_video.mp4').subclip(30,46)
v_input = VideoFileClip('project_video.mp4')
processed_clip = v_input.fl_image(process_image)
%time processed_clip.write_videofile(output, audio=False)
# newclip = VideoFileClip('project_video.mp4').subclip(0,-2)
# grab video frames & dump into vid_frames
vidcap = cv2.VideoCapture('project_video.mp4')
vidcap.set(0,22000)
# success,image = vidcap.read()
count = 0
success = True
while success and (count <= 40):
success,image = vidcap.read()
# print 'Read a new frame: ', success
cv2.imwrite("fram_22/frame%d.jpg" % count, image) # save frame as JPEG file
count += 1
print('done')
I think the major problems were in tuning the filters and thresholds to work for various cases. I had to keep saving frames to draw the filters on each image frame and idenitfy the frame which was casuing issues or deviation. Any amount of averaging and normalization is of no consequence if the polynomial line detected is not fairly accurate. This means the most important step is identifying the right combination of thresholds for the image. I am concerned on how this would work on images in different weather conditions. Different weather conditions liek snow or rain may hamper the visibility of the road (as seen by the camera)..making the detection inaccurate. Also conditions when there are no lane lines which is fairly common in different places would render this algorithm ineffective. I think it would be preferrable to combine the computer vision based approach along with deep learning approach to make it more robust and also implementing some sort of dynamic thresholding maybe more useful as well.